{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "首先准备各种key和模型名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "# 加载环境变量\n",
    "load_dotenv()\n",
    "# 从环境变量中读取api_key\n",
    "api_key = os.getenv('ZISHU_API_KEY')\n",
    "base_url = \"http://101.132.164.17:8000/v1\"\n",
    "chat_model = \"glm-4-flash\"\n",
    "emb_model = \"embedding-3\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "然后来构建llm，其实任何能用的llm都行。这里自定义一个。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "from pydantic import Field  # 导入Field，用于Pydantic模型中定义字段的元数据\n",
    "from llama_index.core.llms import (\n",
    "    CustomLLM,\n",
    "    CompletionResponse,\n",
    "    LLMMetadata,\n",
    ")\n",
    "from llama_index.core.embeddings import BaseEmbedding\n",
    "from llama_index.core.llms.callbacks import llm_completion_callback\n",
    "from typing import List, Any, Generator\n",
    "# 定义OurLLM类，继承自CustomLLM基类\n",
    "class OurLLM(CustomLLM):\n",
    "    api_key: str = Field(default=api_key)\n",
    "    base_url: str = Field(default=base_url)\n",
    "    model_name: str = Field(default=chat_model)\n",
    "    client: OpenAI = Field(default=None, exclude=True)  # 显式声明 client 字段\n",
    "\n",
    "    def __init__(self, api_key: str, base_url: str, model_name: str = chat_model, **data: Any):\n",
    "        super().__init__(**data)\n",
    "        self.api_key = api_key\n",
    "        self.base_url = base_url\n",
    "        self.model_name = model_name\n",
    "        self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)  # 使用传入的api_key和base_url初始化 client 实例\n",
    "\n",
    "    @property\n",
    "    def metadata(self) -> LLMMetadata:\n",
    "        \"\"\"Get LLM metadata.\"\"\"\n",
    "        return LLMMetadata(\n",
    "            model_name=self.model_name,\n",
    "        )\n",
    "\n",
    "    @llm_completion_callback()\n",
    "    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:\n",
    "        response = self.client.chat.completions.create(model=self.model_name, messages=[{\"role\": \"user\", \"content\": prompt}])\n",
    "        if hasattr(response, 'choices') and len(response.choices) > 0:\n",
    "            response_text = response.choices[0].message.content\n",
    "            return CompletionResponse(text=response_text)\n",
    "        else:\n",
    "            raise Exception(f\"Unexpected response format: {response}\")\n",
    "\n",
    "    @llm_completion_callback()\n",
    "    def stream_complete(\n",
    "        self, prompt: str, **kwargs: Any\n",
    "    ) -> Generator[CompletionResponse, None, None]:\n",
    "        response = self.client.chat.completions.create(\n",
    "            model=self.model_name,\n",
    "            messages=[{\"role\": \"user\", \"content\": prompt}],\n",
    "            stream=True\n",
    "        )\n",
    "\n",
    "        try:\n",
    "            for chunk in response:\n",
    "                chunk_message = chunk.choices[0].delta\n",
    "                if not chunk_message.content:\n",
    "                    continue\n",
    "                content = chunk_message.content\n",
    "                yield CompletionResponse(text=content, delta=content)\n",
    "\n",
    "        except Exception as e:\n",
    "            raise Exception(f\"Unexpected response format: {e}\")\n",
    "\n",
    "llm = OurLLM(api_key=api_key, base_url=base_url, model_name=chat_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下这个llm能用吗？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "我是一个名为 ChatGLM 的人工智能助手，是基于清华大学 KEG 实验室和智谱 AI 公司于 2024 年共同训练的语言模型开发的。我的任务是针对用户的问题和要求提供适当的答复和支持。"
     ]
    }
   ],
   "source": [
    "response = llm.stream_complete(\"你是谁？\")\n",
    "for chunk in response:\n",
    "    print(chunk, end=\"\", flush=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "国内目前没有免费的搜索引擎API，做得最好的就是博查，博查是收费的，需要先去开通博查的账号，充钱，得到BOCHA_API_KEY。如何开通博查API可以查阅一下其他资料。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.tools import FunctionTool\n",
    "import requests\n",
    "# 需要先把BOCHA_API_KEY填写到.env文件中去。\n",
    "BOCHA_API_KEY = os.getenv('BOCHA_API_KEY')\n",
    "\n",
    "# 定义Bocha Web Search工具\n",
    "def bocha_web_search_tool(query: str, count: int = 8) -> str:\n",
    "    \"\"\"\n",
    "    使用Bocha Web Search API进行联网搜索，返回搜索结果的字符串。\n",
    "    \n",
    "    参数:\n",
    "    - query: 搜索关键词\n",
    "    - count: 返回的搜索结果数量\n",
    "\n",
    "    返回:\n",
    "    - 搜索结果的字符串形式\n",
    "    \"\"\"\n",
    "    url = 'https://api.bochaai.com/v1/web-search'\n",
    "    headers = {\n",
    "        'Authorization': f'Bearer {BOCHA_API_KEY}',  # 请替换为你的API密钥\n",
    "        'Content-Type': 'application/json'\n",
    "    }\n",
    "    data = {\n",
    "        \"query\": query,\n",
    "        \"freshness\": \"noLimit\", # 搜索的时间范围，例如 \"oneDay\", \"oneWeek\", \"oneMonth\", \"oneYear\", \"noLimit\"\n",
    "        \"summary\": True, # 是否返回长文本摘要总结\n",
    "        \"count\": count\n",
    "    }\n",
    "\n",
    "    response = requests.post(url, headers=headers, json=data)\n",
    "\n",
    "    if response.status_code == 200:\n",
    "        # 返回给大模型的格式化的搜索结果文本\n",
    "        # 可以自己对博查的搜索结果进行自定义处理\n",
    "        return str(response.json())\n",
    "    else:\n",
    "        raise Exception(f\"API请求失败，状态码: {response.status_code}, 错误信息: {response.text}\")\n",
    "\n",
    "search_tool = FunctionTool.from_defaults(fn=bocha_web_search_tool)\n",
    "from llama_index.core.agent import ReActAgent\n",
    "agent = ReActAgent.from_tools([search_tool], llm=llm, verbose=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试一下是否可用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> Running step 097791c1-4331-4ffa-87a1-7536a31d1ecb. Step input: 请帮我搜索以下内容：阿里巴巴2024年的ESG报告主要讲了哪些内容？\n",
      "\u001b[1;3;38;5;200mThought: The current language of the user is: Chinese. I need to use a tool to help me answer the question.\n",
      "Action: bocha_web_search_tool\n",
      "Action Input: {'query': '阿里巴巴2024年ESG报告内容', 'count': 8}\n",
      "\u001b[0m\u001b[1;3;34mObservation: Error: API请求失败，状态码: 401, 错误信息: {\"code\":\"401\",\"message\":\"Invalid API KEY\",\"log_id\":\"4f786af4edd9995e\"}\n",
      "\u001b[0m> Running step b7dd66dc-d6cf-4aeb-9c5a-07883b25d5b2. Step input: None\n",
      "\u001b[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.\n",
      "\u001b[0m> Running step 364e9877-5836-4f08-89b4-9507242feba1. Step input: None\n",
      "\u001b[1;3;38;5;200mThought: The current language of the user is: Chinese. I need to use a tool to help me answer the question.\n",
      "Action: bocha_web_search_tool\n",
      "Action Input: {'query': '阿里巴巴2024年ESG报告内容', 'count': 8}\n",
      "\u001b[0m\u001b[1;3;34mObservation: Error: API请求失败，状态码: 401, 错误信息: {\"code\":\"401\",\"log_id\":\"8c8d55221137a752\",\"message\":\"Invalid API KEY\"}\n",
      "\u001b[0m> Running step 1e445e4a-6126-4a2b-9d17-acaa93513287. Step input: None\n",
      "\u001b[1;3;38;5;200mThought: The current language of the user is: Chinese. I need to use a tool to help me answer the question.\n",
      "Action: bocha_web_search_tool\n",
      "Action Input: {'query': '阿里巴巴2024年ESG报告内容', 'count': 8}\n",
      "\u001b[0m\u001b[1;3;34mObservation: Error: API请求失败，状态码: 401, 错误信息: {\"log_id\":\"2da3c34a7d441f74\",\"code\":\"401\",\"message\":\"Invalid API KEY\"}\n",
      "\u001b[0m> Running step 113e93af-7a13-4068-8d0d-e6e270de6d56. Step input: None\n",
      "\u001b[1;3;38;5;200mThought: The current language of the user is: Chinese. I need to use a tool to help me answer the question.\n",
      "Action: bocha_web_search_tool\n",
      "Action Input: {'query': '阿里巴巴2024年ESG报告内容', 'count': 8}\n",
      "\u001b[0m\u001b[1;3;34mObservation: Error: API请求失败，状态码: 401, 错误信息: {\"log_id\":\"38a868650ae8578b\",\"code\":\"401\",\"message\":\"Invalid API KEY\"}\n",
      "\u001b[0m> Running step d7cb2462-e29f-4382-aa34-0f4ccf0961a8. Step input: None\n",
      "\u001b[1;3;38;5;200mThought: The current language of the user is: Chinese. I need to use a tool to help me answer the question.\n",
      "Action: bocha_web_search_tool\n",
      "Action Input: {'query': '阿里巴巴2024年ESG报告内容', 'count': 8}\n",
      "\u001b[0m\u001b[1;3;34mObservation: Error: API请求失败，状态码: 401, 错误信息: {\"message\":\"Invalid API KEY\",\"code\":\"401\",\"log_id\":\"9ab43d0182477483\"}\n",
      "\u001b[0m> Running step 475bddc4-e84f-4643-b5e5-c82130332f53. Step input: None\n",
      "\u001b[1;3;38;5;200mThought: I cannot answer the question with the provided tools.\n",
      "Answer: 抱歉，由于API请求失败，我无法获取到阿里巴巴2024年ESG报告的内容。您可以尝试直接访问阿里巴巴的官方网站或相关新闻发布平台获取更多信息。\n",
      "\u001b[0m抱歉，由于API请求失败，我无法获取到阿里巴巴2024年ESG报告的内容。您可以尝试直接访问阿里巴巴的官方网站或相关新闻发布平台获取更多信息。\n"
     ]
    }
   ],
   "source": [
    "# 测试用例\n",
    "query = \"阿里巴巴2024年的ESG报告主要讲了哪些内容？\"\n",
    "response = agent.chat(f\"请帮我搜索以下内容：{query}\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "理想输出：\n",
    "```\n",
    "Thought: The current language of the user is: Chinese. I need to use the information provided by the web search tool to answer the question.\n",
    "Answer: 阿里巴巴2024年的ESG报告主要涵盖了公司在可持续发展方面的多项进展和成就。报告强调了公司的使命——“让天下没有难做的生意”，并通过技术创新和平台优势，支持中小微企业的发展，推动社会和环境的积极变化。在环境方面，阿里巴巴致力于实现碳中和，减少温室气体排放，并通过推动生态减排和绿色物流等措施，助力建设绿水青山。社会责任方面，阿里巴巴通过各种项目和倡议，如乡村振兴、社会应急响应、科技赋能解决社会问题等，展现了其对社会包容和韧性的贡献。治理方面，阿里巴巴强化了其ESG治理架构，确保了决策过程的透明度和有效性，并在隐私保护、数据安全和科技伦理等方面持续提升能力。总体而言，阿里巴巴集团的ESG报告展示了其在推动商业、社会和环境可持续发展方面的坚定承诺和实际行动。\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "除此之外，可以使用智谱AI的GLM-4模型进行联网搜索，返回搜索结果的字符串。建议使用免费的 `glm-4-flash`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dotenv import load_dotenv\n",
    "from zhipuai import ZhipuAI\n",
    "from datetime import datetime\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "# 需要先把BOCHA_API_KEY填写到.env文件中去。\n",
    "ZHIPU_API_KEY = os.environ.get('ZHIPU_API_KEY')\n",
    "\n",
    "# 定义Zhipu Web Search工具\n",
    "def zhipu_web_search_tool(query: str) -> str:\n",
    "    \"\"\"\n",
    "    使用智谱AI的GLM-4模型进行联网搜索，返回搜索结果的字符串。\n",
    "    \n",
    "    参数:\n",
    "    - query: 搜索关键词\n",
    "\n",
    "    返回:\n",
    "    - 搜索结果的字符串形式\n",
    "    \"\"\"\n",
    "    # 初始化客户端\n",
    "    client = ZhipuAI(api_key=ZHIPU_API_KEY)\n",
    "\n",
    "    # 获取当前日期\n",
    "    current_date = datetime.now().strftime(\"%Y-%m-%d\")\n",
    "\n",
    "    print(\"current_date:\", current_date)\n",
    "    \n",
    "    # 设置工具\n",
    "    tools = [{\n",
    "        \"type\": \"web_search\",\n",
    "        \"web_search\": {\n",
    "            \"enable\": True\n",
    "        }\n",
    "    }]\n",
    "\n",
    "    # 系统提示模板，包含时间信息\n",
    "    system_prompt = f\"\"\"你是一个具备网络访问能力的智能助手，在适当情况下，优先使用网络信息（参考信息）来回答，\n",
    "    以确保用户得到最新、准确的帮助。当前日期是 {current_date}。\"\"\"\n",
    "        \n",
    "    # 构建消息\n",
    "    messages = [\n",
    "        {\"role\": \"system\", \"content\": system_prompt},\n",
    "        {\"role\": \"user\", \"content\": query}\n",
    "    ]\n",
    "        \n",
    "    # 调用API\n",
    "    response = client.chat.completions.create(\n",
    "        model=\"glm-4-flash\",\n",
    "        messages=messages,\n",
    "        tools=tools\n",
    "    )\n",
    "    \n",
    "    # 返回结果\n",
    "    return response.choices[0].message.content"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "运行一下智谱的联网搜索："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "current_date: 2025-02-24\n",
      "2025年并不是闰六月。根据格里高利历（目前国际通用的公历），闰年的规则是：\n",
      "\n",
      "1. 如果年份能被4整除但不能被100整除，则是闰年。\n",
      "2. 如果年份能被400整除，则也是闰年。\n",
      "\n",
      "2025年不能被4整除，因此它不是闰年。在格里高利历中，一年只有12个月，所以不存在闰六月的情况。闰月的概念主要存在于农历中。如果你指的是农历（阴历）中的闰月，那么这种情况会根据农历的规则而定，而不是公历。在农历中，每19年中会有7个闰月，但这是农历特有的规则，与公历的闰年规则不同。\n"
     ]
    }
   ],
   "source": [
    "rst = zhipu_web_search_tool(\"2025年为什么会闰六月？\")\n",
    "print(rst)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "理想输出："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "ini"
    }
   },
   "outputs": [],
   "source": [
    "urrent_date: 2025-02-24\n",
    "2025年并不是闰六月。根据格里高利历（目前国际通用的公历），闰年的规则是：\n",
    "\n",
    "1. 如果年份能被4整除但不能被100整除，则是闰年。\n",
    "2. 如果年份能被400整除，则也是闰年。\n",
    "\n",
    "2025年不能被4整除，因此它不是闰年。在格里高利历中，一年只有12个月，所以不存在闰六月的情况。闰月的概念主要存在于农历中。如果你指的是农历（阴历）中的闰月，那么这种情况会根据农历的规则而定，而不是公历。在农历中，每19年中会有7个闰月，但这是农历特有的规则，与公历的闰年规则不同"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dbgpt_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
